# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : 波士顿房价预测_L2正则化.py
# @Author: dongguangwen
# @Date  : 2025-02-06 14:44
#  0.导包
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor, Lasso, Ridge
from sklearn.metrics import mean_squared_error

#  1.加载数据
data = pd.read_csv('./data/波士顿房价.csv')

#  2.数据集划分
x_train, x_test, y_train, y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size=0.2, random_state=22)

# 3.标准化
process = StandardScaler()
x_train = process.fit_transform(x_train)
x_test = process.transform(x_test)

# 4.模型训练
model = Ridge(alpha=0.01)
model.fit(x_train, y_train)

print("模型的权重系数为:\n", model.coef_)
print("模型的偏置为:\n", model.intercept_)

# 5.预测
y_pred = model.predict(x_test)
print(y_pred)

# 6.模型评估
error_mse = mean_squared_error(y_test, y_pred)
print("L2正则化均方误差: ", error_mse)

"""
模型的权重系数为:
 [-0.73075314  1.13191003 -0.14203656  0.86277256 -2.02521786  2.72128165
 -0.16047004 -3.36638304  2.56096878 -1.67969865 -1.67605702  0.91213709
 -3.79445576]
模型的偏置为:
 22.57970297029703
[27.99616113 31.37443524 21.16298243 32.9766546  19.85389767 19.20416196
 21.07997085 19.43186366 19.56021111 32.46222725 20.9566862  27.75974513
 15.39280796 19.64454993 36.52115422 18.64375222  8.93319505 18.30049115
 30.44261688 24.30772643 19.25670252 33.77370891 29.85623034 17.68694041
 34.76754045 26.39982283 34.60386199 27.32690205 19.24585906 14.83576721
 30.57067676 15.46875951 37.09524109  7.01067985 16.15671009 17.44004021
  7.35682053 20.2269018  40.76166318 28.77751661 25.15618863 17.83867398
 39.41421543  7.01255189 22.06467346 25.18540175 20.23616288 20.29740036
 17.46264331 26.21453562  8.54216559 27.39462885 30.85062413 16.66366229
  9.32980114 35.17380795 32.1895458  21.85684025 17.42895387 21.97847318
 23.49494546 23.98592112 19.99396212 38.29362925 24.6864054  20.00388076
 13.97466778  6.75567486 42.12296778 21.90184144 16.96583918 22.41038846
 40.43796778 21.39745727 36.54078167 27.02165261 20.86543921 20.2101432
 25.23287419 22.09188026 31.02311994 20.28756349 24.25992746 31.37569813
 26.70657603 21.09911546 28.91993618 21.8208106  26.3184864  19.69509552
 25.35715854 24.45498964 20.07330977 15.74121673 15.33754499 18.65033243
 24.67387552 16.51794845 21.1640288  26.46890372 21.00199872 18.01825103]
L2正则化均方误差:  20.770938153279797
"""
